The PGC3 MDD study is including the following analyses to identify genes associated with MDD:
Show code
##########
# Nearest gene
##########
library(data.table)
# Read in GWAS results
# Currently we are using results only for lead indepdendant associations from COJO
# I think this is fine
indep_hits<-fread('../../docs/tables/meta_snps_full_eur.cojo.txt')
# Link SNPs to nearest gene
# Insert nearest gene information
library(biomaRt)
## Warning: replacing previous import 'lifecycle::last_warnings' by
## 'rlang::last_warnings' when loading 'tibble'
ensembl = useEnsembl(biomart="ensembl", dataset="hsapiens_gene_ensembl", GRCh=37)
Genes<-getBM(attributes=c('external_gene_name','chromosome_name','start_position','end_position'), mart = ensembl)
## Cache found
window<-50000
for(i in 1:nrow(indep_hits)){
Genes_i<-Genes[Genes$start_position < (indep_hits$BP[i] + window) & Genes$end_position > (indep_hits$BP[i] - window) & Genes$chromosome_name == indep_hits$CHR[i],]
if(nrow(Genes_i) != 0){
gene_string<-NULL
for(j in 1:nrow(Genes_i)){
if(indep_hits$BP[i] > Genes_i$start_position[j] & indep_hits$BP[i] < Genes_i$end_position[j]){
gene_string<-rbind(gene_string, data.frame(ID=Genes_i$external_gene_name[j],
Dist=0,
Pos=NA))
}
if(indep_hits$BP[i] < Genes_i$start_position[j]){
gene_string<-rbind(gene_string, data.frame(ID=Genes_i$external_gene_name[j],
Dist=abs(indep_hits$BP[i] - Genes_i$start_position[j]),
Pos='-'))
}
if(indep_hits$BP[i] > Genes_i$end_position[j]){
gene_string<-rbind(gene_string, data.frame(ID=Genes_i$external_gene_name[j],
Dist=abs(indep_hits$BP[i] - Genes_i$end_position[j]),
Pos='+'))
}
}
gene_string<-gene_string[order(gene_string$Dist),]
indep_hits$NearestGene[i]<-as.character(gene_string$ID[1])
} else {
indep_hits$NearestGene[i]<-NA
}
}
##########
# SNP-finemapping
##########
# Read in finemapping results from Joni. This table shows genes implicated by the finemapping results, by a gene containing the entire 95% credible set.
finemap<-fread('../../docs/tables/finemapping/Locus_FineMapping_Full_Results.csv')
finemap_genes<-unlist(strsplit(finemap$High.Confidence.Genes.Names, ','))
finemap_genes<-finemap_genes[finemap_genes != '-']
##########
# FastBAT
##########
# Read in FastBAT results
fastbat<-fread('../../docs/tables/fastBAT/mdd_fastbat_AllgeneMatrix.gene.fastbat')
fastbat$P.FDR<-p.adjust(fastbat$Pvalue, method='fdr')
##########
# H-MAGMA
##########
hmagma<-fread('../../docs/tables/H-MAGMA/PGC_MDD_Results_mar2022.csv')
# Exclude astrocytes
hmagma_noAstr<-hmagma[hmagma$Analysis != 'Astrocytes',]
# Apply FDR correction across all tests
hmagma_noAstr$P.FDR<-p.adjust(hmagma_noAstr$P, method = 'fdr')
Genes<-getBM(attributes=c('external_gene_name','ensembl_gene_id'), mart = ensembl)
## Cache found
Genes<-Genes[!duplicated(Genes$ensembl_gene_id),]
hmagma_noAstr<-merge(hmagma_noAstr, Genes, by.x='GENE', by.y='ensembl_gene_id')
##########
# TWAS
##########
twas<-fread('../../docs/tables/twas/PGC_MDD3_twas_AllTissues_GW.txt')
twas_sig<-twas[twas$TWAS.P < 3.685926e-08,]
##########
# Colocalisation
##########
coloc<-read.csv('../../docs/tables/twas/PGC_MDD3_TWAS_colocalisation.csv')
coloc_sig<-coloc[coloc$COLOC.PP4 > 0.8,]
##########
# FOCUS
##########
focus<-read.csv('../../docs/tables/twas/PGC_MDD3_TWAS.TWSig.FOCUS.results.csv')
fusion <- fread("../../docs/tables/twas/PGC_MDD3_twas_AllTissues_TWSig_CLEAN.txt")
fusion<-fusion[,c('PANEL','PANEL_clean_short','PANEL_clean'), with=F]
fusion<-fusion[!duplicated(fusion),]
focus<-merge(focus, fusion, by.x='SNP.weight.Set', by.y='PANEL_clean_short')
focus_sig<-focus[focus$FOCUS_pip > 0.5,]
##########
# Expression analysis based high confidence genes
##########
expression_highconf_res<-fread('../../docs/tables/twas/PGC3_MDD_TWAS_HighConf_results.csv')
##########
# SMR
##########
# Read in the SMR results
smr_res<-list()
smr_res[['eQTLGen_Blood']]<-fread('../../docs/tables/twas/eqtlgen_smr_res_GW_withIDs.csv')
names(smr_res[['eQTLGen_Blood']])[names(smr_res[['eQTLGen_Blood']]) == 'GeneSymbol']<-'HGNCName'
smr_res[['eQTLGen_Blood']]$eQTL_source<-'eQTLGen_Blood'
tissue<-c("Basalganglia","Cerebellum","Cortex","Hippocampus","Spinalcord")
for(tissue_i in tissue){
smr_res[[tissue_i]]<-fread(paste0('../../docs/tables/twas/metabrain_',tissue_i,'_smr_res_GW_withIDs.csv'))
smr_res[[tissue_i]]$eQTL_source<-paste0('MetaBrain_',tissue_i)
}
smr_res_dat<-do.call(rbind, smr_res)
smr_res_dat$p_SMR_fdr_all<-p.adjust(smr_res_dat$p_SMR, method="fdr")
smr_res_dat_sig<-smr_res_dat[smr_res_dat$p_SMR_fdr_all < 0.05,]
##########
# HEIDI
##########
heidi<-smr_res_dat_sig[smr_res_dat_sig$p_HEIDI > 0.05,]
##########
# PWAS
##########
# For no just read in the ROSMAP results
pwas<-NULL
for(i in 1:22){
if(i != 6){
pwas<-rbind(pwas, fread(paste0('../../docs/tables/pwas/PGC_MDD3_pwas_rosmap_chr',i)))
} else {
pwas<-rbind(pwas, fread(paste0('../../docs/tables/pwas/PGC_MDD3_pwas_rosmap_chr',i)))
pwas<-rbind(pwas, fread(paste0('../../docs/tables/pwas/PGC_MDD3_pwas_rosmap_chr',i,'.MHC')))
}
}
pwas$TWAS.P.FDR<-p.adjust(pwas$TWAS.P)
pwas$ID<-gsub('.*\\.','', pwas$ID)
# Read in PWAS and SMR results for all significant ROSMAP PWAS assoc results
pwas_smr_rosmap_banner<-fread('../../docs/tables/pwas/rosmap_banner_pwas_smr_results.csv')
########
# PsyOPS
########
psyops <- fread('../../docs/tables/psyops/psyops_full_eur.cojo.txt')
psyops_prioritised<-NULL
for(i in unique(psyops$lead_variant)){
tmp<-psyops[psyops$lead_variant == i,]
tmp<-tmp[tmp$PsyOPS_score == max(tmp$PsyOPS_score),]
tmp<-tmp[tmp$distance == min(tmp$distance),]
psyops_prioritised<-rbind(psyops_prioritised, tmp)
}
psyops_genes <- psyops_prioritised$gene
This plot will show the number of genes returned by each analysis and the overlap of each analysis
Show code
## png
## 2
## png
## 2
Show UpSet plot of genes across all analyses
High-confidence genes
Show UpSet plot of genes across high-confidence analyses
High-confidence genes
Show code
## Warning in melt(high_conf_tab, id.vars = "ID"): The melt generic in data.table
## has been passed a data.frame and will attempt to redirect to the relevant
## reshape2 method; please note that reshape2 is deprecated, and this redirection
## is now deprecated as well. To continue using melt methods from reshape2 while
## both libraries are attached, e.g. melt.list, you can prepend the namespace like
## reshape2::melt(high_conf_tab). In the next version, this warning will become an
## error.
##
## ********************************************************
## Note: As of version 1.0.0, cowplot does not change the
## default ggplot2 theme anymore. To recover the previous
## behavior, execute:
## theme_set(theme_cowplot())
## ********************************************************
## png
## 2
Show heatmap of high confidence associations
High-confidence genes
This will give some indication of how fine-mapped variants may affect gene expression and protein levels, and may also give clarity for associations that have a different direction of effect in the TWAS and PWAS (which is still the case for the GOPC gene).
Show code
###
# Create a dataframe containing gene ID, PANEL, Method and Z scores for all expression and protein analyses.
###
all_func_res<-NULL
# TWAS
twas_tmp<-twas[,c('PANEL','ID','TWAS.Z'), with=F]
twas_tmp$Sig<-twas$TWAS.P < 3.685926e-08
twas_tmp$Coloc<-twas$COLOC.PP4 > 0.8
names(twas_tmp)<-c('Panel','ID','Z','Sig','Coloc')
twas_tmp$Method<-'FUSION'
twas_tmp$Type<-'Expr.'
twas_tmp$Type[grepl('SPLIC',twas_tmp$Panel)]<-'Splice'
# Retain only the most significant assoc for each gene within PANEL (only relevent for splice panel)
twas_tmp<-twas_tmp[order(-abs(twas_tmp$Z)),]
twas_tmp<-twas_tmp[!duplicated(paste0(twas_tmp$Panel, twas_tmp$ID)),]
twas_tmp$Tissue<-NA
twas_tmp$Tissue[!grepl('Adrenal|BLOOD|Blood|Thyroid',twas_tmp$Panel)]<-'Brain'
twas_tmp$Tissue[grepl('BLOOD|Blood',twas_tmp$Panel)]<-'Blood'
twas_tmp$Tissue[grepl('Adrenal|Thyroid',twas_tmp$Panel)]<-'HPA/HPT'
twas_tmp<-merge(twas_tmp, focus_sig[,c('ID','PANEL','FOCUS_pip')], by.x=c('Panel','ID'), by.y=c('PANEL','ID'), all.x=T)
twas_tmp$FOCUS[twas_tmp$FOCUS_pip > 0.5]<-T
twas_tmp$FOCUS[twas_tmp$FOCUS_pip < 0.5 | is.na(twas_tmp$FOCUS_pip)]<-F
twas_tmp<-twas_tmp[order(-twas_tmp$FOCUS_pip),]
twas_tmp<-twas_tmp[!duplicated(paste0(twas_tmp$Panel, twas_tmp$ID)),]
twas_tmp$FOCUS_pip<-NULL
all_func_res<-rbind(all_func_res, twas_tmp)
# SMR expression
smr_res_dat$Z<-smr_res_dat$b_SMR/smr_res_dat$se_SMR
smr_res_dat$Sig<-smr_res_dat$p_SMR_fdr_all < 0.05
smr_res_dat$Coloc<-smr_res_dat$p_HEIDI > 0.05
smr_tmp<-smr_res_dat[,c('eQTL_source','HGNCName','Z','Sig','Coloc'), with=F]
names(smr_tmp)<-c('Panel','ID','Z','Sig','Coloc')
smr_tmp$Method<-'SMR'
smr_tmp$Type<-'Expr.'
smr_tmp$Tissue<-NA
smr_tmp$Tissue[!grepl('eQTLGen_Blood',smr_tmp$Panel)]<-'Brain'
smr_tmp$Tissue[grepl('eQTLGen_Blood',smr_tmp$Panel)]<-'Blood'
smr_tmp$FOCUS<-F
all_func_res<-rbind(all_func_res, smr_tmp)
# PWAS
pwas_smr_rosmap_banner$rosmap.sig<-T
pwas_rosmap_tmp<-pwas_smr_rosmap_banner[,c('ID','rosmap.TWAS.Z','rosmap.sig','rosmap.causal'), with=F]
pwas_rosmap_tmp$PANEL<-'ROSMAP DLPFC'
pwas_rosmap_tmp<-pwas_rosmap_tmp[,c('PANEL','ID','rosmap.TWAS.Z','rosmap.sig','rosmap.causal'), with=F]
names(pwas_rosmap_tmp)<-c('Panel','ID','Z','Sig','Coloc')
pwas_rosmap_tmp<-pwas_rosmap_tmp[order(-abs(pwas_rosmap_tmp$Z)),]
pwas_rosmap_tmp<-pwas_rosmap_tmp[!duplicated(paste0(pwas_rosmap_tmp$Panel, pwas_rosmap_tmp$ID)),]
pwas_rosmap_tmp$Method<-'FUSION'
pwas_rosmap_tmp$Type<-'Protein'
pwas_rosmap_tmp$Tissue<-'Brain'
pwas_rosmap_tmp$FOCUS<-F
all_func_res<-rbind(all_func_res, pwas_rosmap_tmp)
pwas_banner_tmp<-pwas_smr_rosmap_banner[,c('ID','banner.TWAS.Z','banner_replicated','banner.causal'), with=F]
pwas_banner_tmp$PANEL<-'Banner et al. DLPFC'
pwas_banner_tmp<-pwas_banner_tmp[,c('PANEL','ID','banner.TWAS.Z','banner_replicated','banner.causal'), with=F]
names(pwas_banner_tmp)<-c('Panel','ID','Z','Sig','Coloc')
pwas_banner_tmp<-pwas_banner_tmp[order(-abs(pwas_banner_tmp$Z)),]
pwas_banner_tmp<-pwas_banner_tmp[!duplicated(paste0(pwas_banner_tmp$Panel, pwas_banner_tmp$ID)),]
pwas_banner_tmp$Method<-'FUSION'
pwas_banner_tmp$Type<-'Protein'
pwas_banner_tmp$Tissue<-'Brain'
pwas_banner_tmp$FOCUS<-F
all_func_res<-rbind(all_func_res, pwas_banner_tmp)
# SMR protein
pwas_smr_rosmap_banner$z_SMR<-abs(qnorm(pwas_smr_rosmap_banner$p_SMR/2))
pwas_smr_rosmap_banner$z_SMR<-sign(pwas_smr_rosmap_banner$b_SMR)*pwas_smr_rosmap_banner$z_SMR
smr_rosmap_tmp<-pwas_smr_rosmap_banner[,c('ID','z_SMR','smr_replicated','smr.causal'), with=F]
smr_rosmap_tmp$PANEL<-'ROSMAP DLPFC'
smr_rosmap_tmp<-smr_rosmap_tmp[,c('PANEL','ID','z_SMR','smr_replicated','smr.causal'), with=F]
names(smr_rosmap_tmp)<-c('Panel','ID','Z','Sig','Coloc')
smr_rosmap_tmp<-smr_rosmap_tmp[order(-abs(smr_rosmap_tmp$Z)),]
smr_rosmap_tmp<-smr_rosmap_tmp[!duplicated(paste0(smr_rosmap_tmp$Panel, smr_rosmap_tmp$ID)),]
smr_rosmap_tmp$Method<-'SMR'
smr_rosmap_tmp$Type<-'Protein'
smr_rosmap_tmp$Tissue<-'Brain'
smr_rosmap_tmp$FOCUS<-F
all_func_res<-rbind(all_func_res, smr_rosmap_tmp)
# Subset to high confidence genes
all_func_res<-all_func_res[all_func_res$ID %in% high_conf,]
# Remove missing values
all_func_res<-all_func_res[complete.cases(all_func_res),]
all_func_res$Group<-paste0(all_func_res$Tissue,'\n',all_func_res$Method,'\n',all_func_res$Type )
all_func_res$Group<-factor(all_func_res$Group, levels=c("Brain\nFUSION\nExpr.","Brain\nFUSION\nSplice","Brain\nSMR\nExpr.","Brain\nFUSION\nProtein","Brain\nSMR\nProtein","Blood\nFUSION\nExpr.","Blood\nSMR\nExpr.","HPA/HPT\nFUSION\nExpr."))
all_func_res$ID<-factor(all_func_res$ID, levels=rev(levels(high_conf_tab$ID)))
all_func_res$Panel[all_func_res$Panel == "Adrenal_Gland"]<-'GTeX Adrenal Gland'
all_func_res$Panel[all_func_res$Panel == "Brain_Cerebellar_Hemisphere"]<-'GTeX Cerebellar Hemisphere'
all_func_res$Panel[all_func_res$Panel == "Brain_Cerebellum"]<-'GTeX Cerebellum'
all_func_res$Panel[all_func_res$Panel == "Brain_Hypothalamus"]<-'GTeX Hypothalamus'
all_func_res$Panel[all_func_res$Panel == "CMC.BRAIN.RNASEQ"]<-'CMC DLPFC'
all_func_res$Panel[all_func_res$Panel == "CMC.BRAIN.RNASEQ_SPLICING"]<-'CMC DLPFC'
all_func_res$Panel[all_func_res$Panel == "Pituitary"]<-'GTeX Pituitary'
all_func_res$Panel[all_func_res$Panel == "PsychENCODE"]<-'PsychENCODE DLPFC'
all_func_res$Panel[all_func_res$Panel == "Whole_Blood"]<-'GTeX'
all_func_res$Panel[all_func_res$Panel == "NTR.BLOOD.RNAARR"]<-'NTR'
all_func_res$Panel[all_func_res$Panel == "Thyroid"]<-'GTeX Thyroid'
all_func_res$Panel[all_func_res$Panel == "Brain_Caudate_basal_ganglia"]<-'GTeX Caudate basalganglia'
all_func_res$Panel[all_func_res$Panel == "YFS.BLOOD.RNAARR"]<-'YFS'
all_func_res$Panel[all_func_res$Panel == "Brain_Cortex"]<-'GTeX Cortex'
all_func_res$Panel[all_func_res$Panel == "Brain_Frontal_Cortex_BA9"]<-'GTeX Frontal Cortex BA9'
all_func_res$Panel[all_func_res$Panel == "Brain_Hippocampus"]<-'GTeX Hippocampus'
all_func_res$Panel[all_func_res$Panel == "Brain_Amygdala"]<-'GTeX Amygdala'
all_func_res$Panel[all_func_res$Panel == "Brain_Anterior_cingulate_cortex_BA24"]<-'GTeX Anterior cingulate cortex BA24'
all_func_res$Panel[all_func_res$Panel == "Brain_Substantia_nigra"]<-'GTeX Substantia nigra'
all_func_res$Panel[all_func_res$Panel == "Brain_Nucleus_accumbens_basal_ganglia"]<-'GTeX Nucleus accumbens basalganglia'
all_func_res$Panel[all_func_res$Panel == "Brain_Putamen_basal_ganglia"]<-'GTeX Nucleus Putamen basalganglia'
all_func_res$Panel[all_func_res$Panel == "eQTLGen_Blood"]<-'eQTLGen'
all_func_res$Panel[all_func_res$Panel == "MetaBrain_Cerebellum"]<-'MetaBrain Cerebellum'
all_func_res$Panel[all_func_res$Panel == "MetaBrain_Basalganglia"]<-'MetaBrain Basalganglia'
all_func_res$Panel[all_func_res$Panel == "MetaBrain_Cortex"]<-'MetaBrain Cortex'
all_func_res$Panel[all_func_res$Panel == "MetaBrain_Hippocampus"]<-'MetaBrain Hippocampus'
all_func_res$Panel<-factor(all_func_res$Panel, levels=c("GTeX Adrenal Gland" ,"GTeX Amygdala" ,"GTeX Anterior cingulate cortex BA24" ,"GTeX Caudate basalganglia" ,"GTeX Cerebellar Hemisphere" ,"GTeX Cerebellum" ,"GTeX Cortex" ,"GTeX Frontal Cortex BA9" ,"GTeX Hippocampus" ,"GTeX Hypothalamus", "GTeX Nucleus accumbens basalganglia","GTeX Nucleus Putamen basalganglia" ,"GTeX Pituitary",'GTeX Substantia nigra' ,"GTeX Thyroid","CMC DLPFC", "PsychENCODE DLPFC", "GTeX" ,"NTR" ,"YFS", "eQTLGen" ,'MetaBrain Basalganglia',"MetaBrain Cerebellum" ,"MetaBrain Cortex" , 'MetaBrain Hippocampus',"ROSMAP DLPFC" ,"Banner et al. DLPFC"))
# Create heatmap
library(ggplot2)
heatmap<-ggplot(data = all_func_res, aes(x = Panel, y = ID)) +
theme_bw() +
geom_tile(aes(fill = Z), width=0.95, height=0.95) +
geom_tile(data=all_func_res[all_func_res$Sig == T,], aes(x = Panel, y = ID), colour='black', fill=NA, size=0.3, width=0.95, height=0.95) +
geom_tile(data=all_func_res[all_func_res$Coloc ==T & all_func_res$Sig == T,], aes(x = Panel, y = ID), colour='green2', fill=NA, size=0.3, width=0.95, height=0.95) +
geom_tile(data=all_func_res[all_func_res$Coloc ==T & all_func_res$Sig == T & all_func_res$FOCUS == T,], aes(x = Panel, y = ID), colour='magenta2', fill=NA, size=0.3, width=0.95, height=0.95) +
scale_fill_gradientn(colours=c("dodgerblue2","white","red"), na.value = 'white',name = "Z-score") +
theme(axis.text.x = element_text(angle = 45, hjust = 1),plot.title = element_text(hjust = 0.5)) +
geom_text(aes(label=round(Z,1)), color="black", size=3) +
labs(title="High confidence genes across panels and methods") +
facet_wrap(~ Group , nrow=1, scales = "free_x")
group_siz<-NULL
for(i in c("Brain\nFUSION\nExpr.","Brain\nFUSION\nSplice","Brain\nSMR\nExpr.","Brain\nFUSION\nProtein","Brain\nSMR\nProtein","Blood\nFUSION\nExpr.","Blood\nSMR\nExpr.","HPA/HPT\nFUSION\nExpr.")){
group_siz<-rbind(group_siz, data.frame(Group=i,
Size=length(unique(all_func_res$Panel[all_func_res$Group==i]))))
}
# Set minimum size to 3 to allow space for labels
group_siz$Size[group_siz$Size < 2]<-2
group_siz$Prop<-group_siz$Size/sum(group_siz$Size)
group_siz$Width<-4*group_siz$Prop
library(grid)
gt = ggplot_gtable(ggplot_build(heatmap))
for(i in 1:nrow(group_siz)){
gt$widths[gt$layout$l[grep(paste0('panel-',i,'-1'), gt$layout$name)]] = group_siz$Width[i]*gt$widths[gt$layout$l[grep(paste0('panel-',i,'-1'), gt$layout$name)]]
}
png('../../docs/figures/gene_con_func_heatmap.png', units = 'px', res=300, height=8000, width=2900)
grid.draw(gt)
a<-dev.off()
Show heatmap of high confidence associations across expression and protein datasets and methods
High-confidence genes